library(dplyr)
library(readr)
data <- read_csv("CA_divorce.csv")
full <- read_csv("usa_00002.csv")
colnames(data) <- tolower(colnames(data))
colnames(full) <- tolower(colnames(full))
datacopy <- data
fullcopy <- full
datacopy$year <- as.factor(datacopy$year)
datacopy$sex <- as.factor(datacopy$sex)
levels(datacopy$sex) <- c("male", "female")
datacopy$marrno <- as.factor(datacopy$marrno)
datacopy$marst <- as.factor(datacopy$marst)
datacopy$age_interval <- cut(datacopy$age, c(15, 19, 29, 39, 49, 59, 69, 79, 89, 99), labels = c("10s", "20s", "30s", "40s", "50s", "60s", "70s", "80s", "90s"), include.lowest = TRUE)
levels(datacopy$marst) <- c("Married_spouse_present", "Married_spouse_absent", "Separated", "Divorced", "Widowed", "Single")
divorce <- datacopy %>% filter(marst == "Divorced")
divorce$age_interval <- cut(divorce$age, c(15, 19, 29, 39, 49, 59, 69, 79, 89, 99), labels = c("10s", "20s", "30s", "40s", "50s", "60s", "70s", "80s", "90s"), include.lowest = TRUE)
divorce$age_interval <- as.factor(divorce$age_interval)
yrmarr not important because it is correlated with “age”
fullcopy$year <- as.factor(fullcopy$year)
fullcopy$sex <- as.factor(fullcopy$sex)
levels(fullcopy$sex) <- c("male", "female")
fullcopy$marrno <- as.factor(fullcopy$marrno)
fullcopy$marst <- as.factor(fullcopy$marst)
levels(fullcopy$marst) <- c("Married_spouse_present", "Married_spouse_absent", "Separated", "Divorced", "Widowed", "Single")
divorcef <- fullcopy %>% filter(marst == "Divorced")
divorcef$age_interval <- cut(divorcef$age, c(15, 19, 29, 39, 49, 59, 69, 79, 89, 99), labels = c("10s", "20s", "30s", "40s", "50s", "60s", "70s", "80s", "90s"), include.lowest = TRUE)
divorcef$age_interval <- as.factor(divorcef$age_interval)
memory.limit(2000000)
## [1] 2e+06
newdf <- fullcopy %>% filter(year == 2019, statefip == 6)
newdf$age_interval <- cut(newdf$age, c(15, 19, 29, 39, 49, 59, 69, 79, 89, 99), labels = c("10s", "20s", "30s", "40s", "50s", "60s", "70s", "80s", "90s"), include.lowest = TRUE)
newdf$age_interval <- as.factor(newdf$age_interval)
teen <- newdf %>% filter(age_interval == "10s")
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
memory.limit(240000)
## Warning in memory.limit(240000): cannot decrease memory limit: ignored
## [1] 2e+06
nation <- divorcef %>% group_by(statefip, year) %>% summarise(count = n())
## `summarise()` regrouping output by 'statefip' (override with `.groups` argument)
#merged <- merge(divorcef, nation, by = c("statefip", "year"))
nation$statefip <- as.factor(nation$statefip)
levels(nation$statefip) <- c("AL","AK","AZ","AR","CA","CO","CT","DE","DC","FL","GA", "HI","ID","IL","IN","IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV","WI","WY")
fig <- plot_ly(type = "choropleth", locations = nation$statefip, locationmode = "USA-states", z = nation$count, scope = 'usa', frame = nation$year) %>%
layout(title = "2009-2019 US Divorce Count Trend", geo = list(
scope = 'usa',
projection = list(type = 'albers usa'),
showlakes = TRUE,
lakecolor = toRGB('white')
)) %>% style(hoverlabel = list(bgcolor = 'white'))
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: 'choropleth' objects don't have these attributes: 'scope'
## Valid attributes include:
## 'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'locations', 'locationmode', 'z', 'geojson', 'featureidkey', 'text', 'hovertext', 'marker', 'selected', 'unselected', 'hoverinfo', 'hovertemplate', 'showlegend', 'zauto', 'zmin', 'zmax', 'zmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'geo', 'idssrc', 'customdatasrc', 'metasrc', 'locationssrc', 'zsrc', 'textsrc', 'hovertextsrc', 'hoverinfosrc', 'hovertemplatesrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
fig
## Warning: 'choropleth' objects don't have these attributes: 'scope'
## Valid attributes include:
## 'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'locations', 'locationmode', 'z', 'geojson', 'featureidkey', 'text', 'hovertext', 'marker', 'selected', 'unselected', 'hoverinfo', 'hovertemplate', 'showlegend', 'zauto', 'zmin', 'zmax', 'zmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'geo', 'idssrc', 'customdatasrc', 'metasrc', 'locationssrc', 'zsrc', 'textsrc', 'hovertextsrc', 'hoverinfosrc', 'hovertemplatesrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
Sys.setenv("plotly_username" = "hanaylim")
Sys.setenv("plotly_api_key" = "ERfN7JPfQfV9QJlVnj0H")
api_create(fig, "Divorce in US 2019")
## Warning: 'choropleth' objects don't have these attributes: 'scope'
## Valid attributes include:
## 'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'locations', 'locationmode', 'z', 'geojson', 'featureidkey', 'text', 'hovertext', 'marker', 'selected', 'unselected', 'hoverinfo', 'hovertemplate', 'showlegend', 'zauto', 'zmin', 'zmax', 'zmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'geo', 'idssrc', 'customdatasrc', 'metasrc', 'locationssrc', 'zsrc', 'textsrc', 'hovertextsrc', 'hoverinfosrc', 'hovertemplatesrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Found a grid already named: 'Divorce in US 2019 Grid'. Since fileopt='overwrite', I'll try to update it
## Found a plot already named: 'Divorce in US 2019'. Since fileopt='overwrite', I'll try to update it
# https://plotly.com/~hanaylim/3/
Barely any changes, most divorce in California
Age groups of 50s and 60s get the most divorce. Females tend to request/get(?) divorce more than men do.
library(ggplot2)
#table(divorce$age) #7486, 7487
ggplot(divorce, aes(x = age_interval, fill = sex)) + geom_bar() +
geom_text(stat = "count", aes(label = ..count..), position = position_stack(vjust = 0.5)) +
scale_fill_manual(values = c("dodgerblue", "palevioletred")) +
ggtitle("Count of Divorce in California by Age Intervals") +
theme(plot.title = element_text(hjust = 0.5))
library(wesanderson)
library(plotly)
# memory.limit(size=120000)
divorcef %>% filter(statefip == 6) %>% group_by(year) %>% summarise(count = n()) %>% ggplot(aes(x = year, y = count, group = 1)) + geom_point() + geom_line() +
ggtitle("The divorce trend counts in California from 2009 - 2019")
## `summarise()` ungrouping output (override with `.groups` argument)
divorcef %>% filter(statefip == 6) %>% group_by(year, age_interval) %>% summarise(count = n()) %>% ggplot(aes(x = year, y = count, group = age_interval)) +
geom_point(aes(color = age_interval)) + geom_line(aes(color = age_interval)) +
ggtitle("The divorce trend counts in California from 2009 - 2019 by age groups")
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
divorcef %>% filter(statefip == 6, age_interval == "10s") %>% group_by(year) %>% summarise(count = n()) %>%
ggplot(aes(x = year, y = count, group = 1)) + geom_point() + geom_line() +
ggtitle("The divorce trend of the age 15 - 19 in California from 2009 - 2019")
## `summarise()` ungrouping output (override with `.groups` argument)
teendivorce <- divorcef %>% filter(statefip == 6, age_interval == "10s")
teendivorce$no_yr_marr <- as.numeric(as.character(teendivorce$year)) - teendivorce$yrmarr
teendivorce$no_yr_marr <- as.factor(teendivorce$no_yr_marr)
d <- teendivorce %>% group_by(no_yr_marr) %>% summarise(count = n()) %>% ungroup() %>% mutate(prop = count/sum(count))
## `summarise()` ungrouping output (override with `.groups` argument)
d$no_yr_marr <- as.factor(d$no_yr_marr)
# ggplot(d, aes(x = "", y = prop, fill = no_yr_marr)) + geom_bar(stat = "identity", color = "white", width = 1) +
# coord_polar("y") + theme_void() +
# geom_text(aes(label = round(prop, 3)*100), position = position_stack(vjust = 0.5)) +
# scale_fill_manual("Duration of marriage", values = wes_palette("Zissou1", n = 5)) +
# theme(axis.text = element_blank(), axis.ticks = element_blank(), panel.grid = element_blank()) +
# ggtitle("Marriage year duration for ages of 19 and under (%)")
pie <- plot_ly(d, labels = ~no_yr_marr, values = ~prop, type = 'pie',textposition = 'outside',textinfo = 'label+percent') %>%
layout(title = '2019 Duration of Teen Marriage in California (age 15-19) (%)',
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE)) %>%
layout(legend=list(title=list(text='<b> Duration of Marriage </b>')))
pie
api_create(pie, "Duration of Teen marriage (age 15 - 19)")
## Found a grid already named: 'Duration of Teen marriage (age 15 - 19) Grid'. Since fileopt='overwrite', I'll try to update it
## Found a plot already named: 'Duration of Teen marriage (age 15 - 19)'. Since fileopt='overwrite', I'll try to update it
# https://chart-studio.plotly.com/~hanaylim/7/#/
teen %>% filter(marst != "Single") %>% group_by(marst) %>% summarise(count = n()) %>% mutate(prop = count / sum(count)) %>%
ggplot(aes(x = "", y = count, fill = marst)) + geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar("y", start = 0) + theme_void() +
geom_text(aes(label = paste0(round(prop, 3)*100, "%")), position = position_stack(vjust = 0.5)) +
ggtitle("2019 Age group of 15-19 Marital Status in California")
## `summarise()` ungrouping output (override with `.groups` argument)
library(readxl)
library(dplyr)
divorce <- read_excel("SF_3_1_Marriage_divorce_rates.xlsx", sheet = "DivorceRate", skip = 3)
divorce <- divorce[-c(52:nrow(divorce)), -c(2:match("1991", names(divorce)))]
values <- suppressWarnings(as.data.frame(lapply(divorce[, -1], as.numeric), check.names = FALSE))
divorce <- cbind("Country" = divorce[,1], mutate(values, across(where(is.numeric), round, 3)))
divorce
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.4 v stringr 1.4.0
## v tidyr 1.1.2 v forcats 0.5.0
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x plotly::filter() masks dplyr::filter(), stats::filter()
## x dplyr::lag() masks stats::lag()
test <- divorce %>% gather(year, rate, `1992`:`2017`)
a <- test[-which(is.na(test$rate)), ]
library(ggplot2)
ggplot(data = a, aes(x = year, y = rate, fill = Country)) + geom_bar(stat = "identity") +
coord_flip() +
ggtitle("Overall Trend in Divorce Rates") +
theme(plot.title = element_text(size = 20, hjust = 0.5), legend.text = element_text(size = 9), legend.key.size = unit(0.01, 'cm')) + geom_text(size = 2.5, aes(label = rate), position = position_stack(vjust = 0.5))
# useful to see which countries have decreasing/increasing trends of divorce rates?
ggplot(a, aes(x = year, y = rate, group = Country)) + geom_point(aes(color = Country), show.legend = FALSE) +
geom_line(aes(color = Country), show.legend = FALSE) +
facet_wrap(~Country) +
theme(plot.title = element_text(size = 20, hjust = 0.5), axis.text.x = element_text(angle = 90, size = 7), strip.text.x = element_text(size = 12)) +
ggtitle("Trends of divorce rates in each country")
before <- divorce[, c(1, 2)]
before_divorce <- before[-which(is.na(before$`1992`)), ] %>% arrange(desc(`1992`))
before_divorce[1:10, ]
before_plot <- ggplot(before_divorce[1:10, ], aes(x = reorder(Country, `1992`), y = `1992`, fill = `1992`)) +
geom_bar(stat = "identity") +
coord_flip() +
xlab("Country") + ylab("Rates in 1992") +
geom_text(aes(label = `1992`), hjust = -0.15, size = 5) +
ggtitle("Top 10 countries with the highest divorce rates in year 1992") +
theme(plot.title = element_text(size = 20, hjust = 0.5),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.title = element_text(size = 13),
legend.text = element_text(size = 13),
legend.key.size = unit(1, 'cm')) +
scale_fill_continuous(name = "Divorce Rate")
ggplot(before_divorce[1:10, ], aes(x = reorder(Country, `1992`), y = `1992`, fill = `1992`)) +
geom_bar(stat = "identity") +
coord_flip() +
xlab("Country") + ylab("Rates in 1992") +
geom_text(aes(label = `1992`), hjust = -0.15) +
ggtitle("Top 10 countries with the highest divorce rates in year 1992") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_continuous(name = "Divorce Rate")
library(cowplot)
recent <- divorce[, c(1, 27)]
recent_divorce <- recent[-which(is.na(recent$`2017`)), ] %>% arrange(desc(`2017`))
recent_divorce[1:10, ]
recent_plot <- ggplot(recent_divorce[1:10, ], aes(x = reorder(Country, `2017`), y = `2017`, fill = `2017`)) +
geom_bar(stat = "identity") +
coord_flip() +
xlab("Country") + ylab("Rates in 2017") +
geom_text(aes(label = `2017`), hjust = -0.15, size = 5) +
ggtitle("Top 10 countries with the highest divorce rates in year 2017") +
theme(plot.title = element_text(size = 20, hjust = 0.5),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 13),
legend.title = element_text(size = 13),
legend.key.size = unit(1, 'cm')) +
scale_fill_continuous(name = "Divorce Rate")
ggplot(recent_divorce[1:10, ], aes(x = reorder(Country, `2017`), y = `2017`, fill = `2017`)) +
geom_bar(stat = "identity") +
coord_flip() +
xlab("Country") + ylab("Rates in 2017") +
geom_text(aes(label = `2017`), hjust = -0.15) +
ggtitle("Top 10 countries with the highest divorce rates in year 2017") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_continuous(name = "Divorce Rate")
plot_grid(before_plot, recent_plot)
is divorce related to the presence of children?